home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Software of the Month Club 2000 October
/
Software of the Month - Ultimate Collection Shareware 277.iso
/
pc
/
PROGRAMS
/
UTILITY
/
WINLINUX
/
DATA1.CAB
/
programs_-_kernel_source
/
MM
/
SLAB.C
< prev
next >
Wrap
C/C++ Source or Header
|
1999-09-17
|
58KB
|
1,937 lines
/*
* linux/mm/slab.c
* Written by Mark Hemment, 1996/97.
* (markhe@nextd.demon.co.uk)
*
* 11 April '97. Started multi-threading - markhe
* The global cache-chain is protected by the semaphore 'cache_chain_sem'.
* The sem is only needed when accessing/extending the cache-chain, which
* can never happen inside an interrupt (kmem_cache_create(),
* kmem_cache_shrink() and kmem_cache_reap()).
* This is a medium-term exclusion lock.
*
* Each cache has its own lock; 'c_spinlock'. This lock is needed only
* when accessing non-constant members of a cache-struct.
* Note: 'constant members' are assigned a value in kmem_cache_create() before
* the cache is linked into the cache-chain. The values never change, so not
* even a multi-reader lock is needed for these members.
* The c_spinlock is only ever held for a few cycles.
*
* To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
* maybe be sleeping and therefore not holding the semaphore/lock), the
* c_growing field is used. This also prevents reaping from a cache.
*
* Note, caches can _never_ be destroyed. When a sub-system (eg module) has
* finished with a cache, it can only be shrunk. This leaves the cache empty,
* but already enabled for re-use, eg. during a module re-load.
*
* Notes:
* o Constructors/deconstructors are called while the cache-lock
* is _not_ held. Therefore they _must_ be threaded.
* o Constructors must not attempt to allocate memory from the
* same cache that they are a constructor for - infinite loop!
* (There is no easy way to trap this.)
* o The per-cache locks must be obtained with local-interrupts disabled.
* o When compiled with debug support, and an object-verify (upon release)
* is request for a cache, the verify-function is called with the cache
* lock held. This helps debugging.
* o The functions called from try_to_free_page() must not attempt
* to allocate memory from a cache which is being grown.
* The buffer sub-system might try to allocate memory, via buffer_cachep.
* As this pri is passed to the SLAB, and then (if necessary) onto the
* gfp() funcs (which avoid calling try_to_free_page()), no deadlock
* should happen.
*
* The positioning of the per-cache lock is tricky. If the lock is
* placed on the same h/w cache line as commonly accessed members
* the number of L1 cache-line faults is reduced. However, this can
* lead to the cache-line ping-ponging between processors when the
* lock is in contention (and the common members are being accessed).
* Decided to keep it away from common members.
*
* More fine-graining is possible, with per-slab locks...but this might be
* taking fine graining too far, but would have the advantage;
* During most allocs/frees no writes occur to the cache-struct.
* Therefore a multi-reader/one writer lock could be used (the writer
* needed when the slab chain is being link/unlinked).
* As we would not have an exclusion lock for the cache-structure, one
* would be needed per-slab (for updating s_free ptr, and/or the contents
* of s_index).
* The above locking would allow parallel operations to different slabs within
* the same cache with reduced spinning.
*
* Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
* would allow most allocations from the same cache to execute in parallel.
*
* At present, each engine can be growing a cache. This should be blocked.
*
* It is not currently 100% safe to examine the page_struct outside of a kernel
* or global cli lock. The risk is v. small, and non-fatal.
*
* Calls to printk() are not 100% safe (the function is not threaded). However,
* printk() is only used under an error condition, and the risk is v. small (not
* sure if the console write functions 'enjoy' executing multiple contexts in
* parallel. I guess they don't...).
* Note, for most calls to printk() any held cache-lock is dropped. This is not
* always done for text size reasons - having *_unlock() everywhere is bloat.
*/
/*
* An implementation of the Slab Allocator as described in outline in;
* UNIX Internals: The New Frontiers by Uresh Vahalia
* Pub: Prentice Hall ISBN 0-13-101908-2
* or with a little more detail in;
* The Slab Allocator: An Object-Caching Kernel Memory Allocator
* Jeff Bonwick (Sun Microsystems).
* Presented at: USENIX Summer 1994 Technical Conference
*/
/*
* This implementation deviates from Bonwick's paper as it
* does not use a hash-table for large objects, but rather a per slab
* index to hold the bufctls. This allows the bufctl structure to
* be small (one word), but limits the number of objects a slab (not
* a cache) can contain when off-slab bufctls are used. The limit is the
* size of the largest general cache that does not use off-slab bufctls,
* divided by the size of a bufctl. For 32bit archs, is this 256/4 = 64.
* This is not serious, as it is only for large objects, when it is unwise
* to have too many per slab.
* Note: This limit can be raised by introducing a general cache whose size
* is less than 512 (PAGE_SIZE<<3), but greater than 256.
*/
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/init.h>
/* If there is a different PAGE_SIZE around, and it works with this allocator,
* then change the following.
*/
#if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
#error Your page size is probably not correctly supported - please check
#endif
/* SLAB_MGMT_CHECKS - 1 to enable extra checks in kmem_cache_create().
* 0 if you wish to reduce memory usage.
*
* SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
* SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISON.
* 0 for faster, smaller, code (especially in the critical paths).
*
* SLAB_STATS - 1 to collect stats for /proc/slabinfo.
* 0 for faster, smaller, code (especially in the critical paths).
*
* SLAB_SELFTEST - 1 to perform a few tests, mainly for development.
*/
#define SLAB_MGMT_CHECKS 1
#define SLAB_DEBUG_SUPPORT 0
#define SLAB_STATS 0
#define SLAB_SELFTEST 0
/* Shouldn't this be in a header file somewhere? */
#define BYTES_PER_WORD sizeof(void *)
/* Legal flag mask for kmem_cache_create(). */
#if SLAB_DEBUG_SUPPORT
#if 0
#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
SLAB_HIGH_PACK)
#endif
#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
#else
#if 0
#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
#endif
#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
#endif /* SLAB_DEBUG_SUPPORT */
/* Slab management struct.
* Manages the objs in a slab. Placed either at the end of mem allocated
* for a slab, or from an internal obj cache (cache_slabp).
* Slabs are chained into a partially ordered list; fully used first, partial
* next, and then fully free slabs.
* The first 4 members are referenced during an alloc/free operation, and
* should always appear on the same cache line.
* Note: The offset between some members _must_ match offsets within
* the kmem_cache_t - see kmem_cache_init() for the checks. */
#define SLAB_OFFSET_BITS 16 /* could make this larger for 64bit archs */
typedef struct kmem_slab_s {
struct kmem_bufctl_s *s_freep; /* ptr to first inactive obj in slab */
struct kmem_bufctl_s *s_index;
unsigned long s_magic;
unsigned long s_inuse; /* num of objs active in slab */
struct kmem_slab_s *s_nextp;
struct kmem_slab_s *s_prevp;
void *s_mem; /* addr of first obj in slab */
unsigned long s_offset:SLAB_OFFSET_BITS,
s_dma:1;
} kmem_slab_t;
/* When the slab management is on-slab, this gives the size to use. */
#define slab_align_size (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
/* Test for end of slab chain. */
#define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_offset))
/* s_magic */
#define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* slab is alive */
#define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* slab has been destroyed */
/* Bufctl's are used for linking objs within a slab, identifying what slab an obj
* is in, and the address of the associated obj (for sanity checking with off-slab
* bufctls). What a bufctl contains depends upon the state of the obj and
* the organisation of the cache.
*/
typedef struct kmem_bufctl_s {
union {
struct kmem_bufctl_s *buf_nextp;
kmem_slab_t *buf_slabp; /* slab for obj */
void * buf_objp;
} u;
} kmem_bufctl_t;
/* ...shorthand... */
#define buf_nextp u.buf_nextp
#define buf_slabp u.buf_slabp
#define buf_objp u.buf_objp
#if SLAB_DEBUG_SUPPORT
/* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */
#define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
/* ...and for poisoning */
#define SLAB_POISON_BYTE 0x5a /* byte value for poisoning */
#define SLAB_POISON_END 0xa5 /* end-byte of poisoning */
#endif /* SLAB_DEBUG_SUPPORT */
/* Cache struct - manages a cache.
* First four members are commonly referenced during an alloc/free operation.
*/
struct kmem_cache_s {
kmem_slab_t *c_freep; /* first slab w. free objs */
unsigned long c_flags; /* constant flags */
unsigned long c_offset;
unsigned long c_num; /* # of objs per slab */
unsigned long c_magic;
unsigned long c_inuse; /* kept at zero */
kmem_slab_t *c_firstp; /* first slab in chain */
kmem_slab_t *c_lastp; /* last slab in chain */
spinlock_t c_spinlock;
unsigned long c_growing;
unsigned long c_dflags; /* dynamic flags */
size_t c_org_size;
unsigned long c_gfporder; /* order of pgs per slab (2^n) */
void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */
void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */
unsigned long c_align; /* alignment of objs */
size_t c_colour; /* cache colouring range */
size_t c_colour_next;/* cache colouring */
unsigned long c_failures;
const char *c_name;
struct kmem_cache_s *c_nextp;
kmem_cache_t *c_index_cachep;
#if SLAB_STATS
unsigned long c_num_active;
unsigned long c_num_allocations;
unsigned long c_high_mark;
unsigned long c_grown;
unsigned long c_reaped;
atomic_t c_errors;
#endif /* SLAB_STATS */
};
/* internal c_flags */
#define SLAB_CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
#define SLAB_CFLGS_BUFCTL 0x020000UL /* bufctls in own cache */
#define SLAB_CFLGS_GENERAL 0x080000UL /* a general cache */
/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
#define SLAB_CFLGS_GROWN 0x000002UL /* don't reap a recently grown */
#define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB)
#define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL)
#define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN)
#if SLAB_STATS
#define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++)
#define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--)
#define SLAB_STATS_INC_ALLOCED(x) ((x)->c_num_allocations++)
#define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++)
#define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++)
#define SLAB_STATS_SET_HIGH(x) do { if ((x)->c_num_active > (x)->c_high_mark) \
(x)->c_high_mark = (x)->c_num_active; \
} while (0)
#define SLAB_STATS_INC_ERR(x) (atomic_inc(&(x)->c_errors))
#else
#define SLAB_STATS_INC_ACTIVE(x)
#define SLAB_STATS_DEC_ACTIVE(x)
#define SLAB_STATS_INC_ALLOCED(x)
#define SLAB_STATS_INC_GROWN(x)
#define SLAB_STATS_INC_REAPED(x)
#define SLAB_STATS_SET_HIGH(x)
#define SLAB_STATS_INC_ERR(x)
#endif /* SLAB_STATS */
#if SLAB_SELFTEST
#if !SLAB_DEBUG_SUPPORT
#error Debug support needed for self-test
#endif
static void kmem_self_test(void);
#endif /* SLAB_SELFTEST */
/* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
#define SLAB_C_MAGIC 0x4F17A36DUL
/* maximum size of an obj (in 2^order pages) */
#define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */
/* maximum num of pages for a slab (prevents large requests to the VM layer) */
#define SLAB_MAX_GFP_ORDER 5 /* 32 pages */
/* the 'preferred' minimum num of objs per slab - maybe less for large objs */
#define SLAB_MIN_OBJS_PER_SLAB 4
/* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
* then the page order must be less than this before trying the next order.
*/
#define SLAB_BREAK_GFP_ORDER_HI 2
#define SLAB_BREAK_GFP_ORDER_LO 1
static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO;
/* Macros for storing/retrieving the cachep and or slab from the
* global 'mem_map'. With off-slab bufctls, these are used to find the
* slab an obj belongs to. With kmalloc(), and kfree(), these are used
* to find the cache which an obj belongs to.
*/
#define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x))
#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next)
#define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x))
#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev)
/* Size description struct for general caches. */
typedef struct cache_sizes {
size_t cs_size;
kmem_cache_t *cs_cachep;
} cache_sizes_t;
static cache_sizes_t cache_sizes[] = {
#if PAGE_SIZE == 4096
{ 32, NULL},
#endif
{ 64, NULL},
{ 128, NULL},
{ 256, NULL},
{ 512, NULL},
{1024, NULL},
{2048, NULL},
{4096, NULL},
{8192, NULL},
{16384, NULL},
{32768, NULL},
{65536, NULL},
{131072, NULL},
{0, NULL}
};
/* Names for the general caches. Not placed into the sizes struct for
* a good reason; the string ptr is not needed while searching in kmalloc(),
* and would 'get-in-the-way' in the h/w cache.
*/
static char *cache_sizes_name[] = {
#if PAGE_SIZE == 4096
"size-32",
#endif
"size-64",
"size-128",
"size-256",
"size-512",
"size-1024",
"size-2048",
"size-4096",
"size-8192",
"size-16384",
"size-32768",
"size-65536",
"size-131072"
};
/* internal cache of cache description objs */
static kmem_cache_t cache_cache = {
/* freep, flags */ kmem_slab_end(&cache_cache), SLAB_NO_REAP,
/* offset, num */ sizeof(kmem_cache_t), 0,
/* c_magic, c_inuse */ SLAB_C_MAGIC, 0,
/* firstp, lastp */ kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache),
/* spinlock */ SPIN_LOCK_UNLOCKED,
/* growing */ 0,
/* dflags */ 0,
/* org_size, gfp */ 0, 0,
/* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES,
/* colour, colour_next */ 0, 0,
/* failures */ 0,
/* name */ "kmem_cache",
/* nextp */ &cache_cache,
/* index */ NULL,
};
/* Guard access to the cache-chain. */
static struct semaphore cache_chain_sem;
/* Place maintainer for reaping. */
static kmem_cache_t *clock_searchp = &cache_cache;
/* Internal slab management cache, for when slab management is off-slab. */
static kmem_cache_t *cache_slabp = NULL;
/* Max number of objs-per-slab for caches which use bufctl's.
* Needed to avoid a possible looping condition in kmem_cache_grow().
*/
static unsigned long bufctl_limit = 0;
/* Initialisation - setup the `cache' cache. */
long __init kmem_cache_init(long start, long end)
{
size_t size, i;
#define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x)
#define kmem_slab_diff(a,b) (kmem_slab_offset(a) - kmem_slab_offset(b))
#define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
#define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
/* Sanity checks... */
if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) ||
kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) ||
((kmem_cache_offset(c_lastp) -
((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) !=
kmem_slab_offset(s_prevp)) ||
kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) {
/* Offsets to the magic are incorrect, either the structures have
* been incorrectly changed, or adjustments are needed for your
* architecture.
*/
panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
/* NOTREACHED */
}
#undef kmem_cache_offset
#undef kmem_cache_diff
#undef kmem_slab_offset
#undef kmem_slab_diff
cache_chain_sem = MUTEX;
size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
size += (L1_CACHE_BYTES-1);
size &= ~(L1_CACHE_BYTES-1);
cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
i = (PAGE_SIZE<<cache_cache.c_gfporder)-slab_align_size;
cache_cache.c_num = i / size; /* num of objs per slab */
/* Cache colouring. */
cache_cache.c_colour = (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
cache_cache.c_colour_next = cache_cache.c_colour;
/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
*/
if (num_physpages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
return start;
}
/* Initialisation - setup remaining internal and general caches.
* Called after the gfp() functions have been enabled, and before smp_init().
*/
void __init kmem_cache_sizes_init(void)
{
unsigned int found = 0;
cache_slabp = kmem_cache_create("slab_cache", sizeof(kmem_slab_t),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
if (cache_slabp) {
char **names = cache_sizes_name;
cache_sizes_t *sizes = cache_sizes;
do {
/* For performance, all the general caches are L1 aligned.
* This should be particularly beneficial on SMP boxes, as it
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches. */
if (!(sizes->cs_cachep =
kmem_cache_create(*names++, sizes->cs_size,
0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
goto panic_time;
if (!found) {
/* Inc off-slab bufctl limit until the ceiling is hit. */
if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
found++;
else
bufctl_limit =
(sizes->cs_size/sizeof(kmem_bufctl_t));
}
sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
sizes++;
} while (sizes->cs_size);
#if SLAB_SELFTEST
kmem_self_test();
#endif /* SLAB_SELFTEST */
return;
}
panic_time:
panic("kmem_cache_sizes_init: Error creating caches");
/* NOTREACHED */
}
/* Interface to system's page allocator. Dma pts to non-zero if all
* of memory is DMAable. No need to hold the cache-lock.
*/
static inline void *
kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
{
void *addr;
*dma = flags & SLAB_DMA;
addr = (void*) __get_free_pages(flags, cachep->c_gfporder);
/* Assume that now we have the pages no one else can legally
* messes with the 'struct page's.
* However vm_scan() might try to test the structure to see if
* it is a named-page or buffer-page. The members it tests are
* of no interest here.....
*/
if (!*dma && addr) {
/* Need to check if can dma. */
struct page *page = mem_map + MAP_NR(addr);
*dma = 1<<cachep->c_gfporder;
while ((*dma)--) {
if (!PageDMA(page)) {
*dma = 0;
break;
}
page++;
}
}
return addr;
}
/* Interface to system's page release. */
static inline void
kmem_freepages(kmem_cache_t *cachep, void *addr)
{
unsigned long i = (1<<cachep->c_gfporder);
struct page *page = &mem_map[MAP_NR(addr)];
/* free_pages() does not clear the type bit - we do that.
* The pages have been unlinked from their cache-slab,
* but their 'struct page's might be accessed in
* vm_scan(). Shouldn't be a worry.
*/
while (i--) {
PageClearSlab(page);
page++;
}
free_pages((unsigned long)addr, cachep->c_gfporder);
}
#if SLAB_DEBUG_SUPPORT
static inline void
kmem_poison_obj(kmem_cache_t *cachep, void *addr)
{
memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
*(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISON_END;
}
static inline int
kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
{
void *end;
end = memchr(addr, SLAB_POISON_END, cachep->c_org_size);
if (end != (addr+cachep->c_org_size-1))
return 1;
return 0;
}
#endif /* SLAB_DEBUG_SUPPORT */
/* Three slab chain funcs - all called with ints disabled and the appropriate
* cache-lock held.
*/
static inline void
kmem_slab_unlink(kmem_slab_t *slabp)
{
kmem_slab_t *prevp = slabp->s_prevp;
kmem_slab_t *nextp = slabp->s_nextp;
prevp->s_nextp = nextp;
nextp->s_prevp = prevp;
}
static inline void
kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
kmem_slab_t *lastp = cachep->c_lastp;
slabp->s_nextp = kmem_slab_end(cachep);
slabp->s_prevp = lastp;
cachep->c_lastp = slabp;
lastp->s_nextp = slabp;
}
static inline void
kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
kmem_slab_t *nextp = cachep->c_freep;
kmem_slab_t *prevp = nextp->s_prevp;
slabp->s_nextp = nextp;
slabp->s_prevp = prevp;
nextp->s_prevp = slabp;
slabp->s_prevp->s_nextp = slabp;
}
/* Destroy all the objs in a slab, and release the mem back to the system.
* Before calling the slab must have been unlinked from the cache.
* The cache-lock is not held/needed.
*/
static void
kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
if (cachep->c_dtor
#if SLAB_DEBUG_SUPPORT
|| cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
#endif /*SLAB_DEBUG_SUPPORT*/
) {
/* Doesn't use the bufctl ptrs to find objs. */
unsigned long num = cachep->c_num;
void *objp = slabp->s_mem;
do {
#if SLAB_DEBUG_SUPPORT
if (cachep->c_flags & SLAB_RED_ZONE) {
if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
printk(KERN_ERR "kmem_slab_destroy: "
"Bad front redzone - %s\n",
cachep->c_name);
objp += BYTES_PER_WORD;
if (*((unsigned long*)(objp+cachep->c_org_size)) !=
SLAB_RED_MAGIC1)
printk(KERN_ERR "kmem_slab_destroy: "
"Bad rear redzone - %s\n",
cachep->c_name);
}
if (cachep->c_dtor)
#endif /*SLAB_DEBUG_SUPPORT*/
(cachep->c_dtor)(objp, cachep, 0);
#if SLAB_DEBUG_SUPPORT
else if (cachep->c_flags & SLAB_POISON) {
if (kmem_check_poison_obj(cachep, objp))
printk(KERN_ERR "kmem_slab_destroy: "
"Bad poison - %s\n", cachep->c_name);
}
if (cachep->c_flags & SLAB_RED_ZONE)
objp -= BYTES_PER_WORD;
#endif /* SLAB_DEBUG_SUPPORT */
objp += cachep->c_offset;
if (!slabp->s_index)
objp += sizeof(kmem_bufctl_t);
} while (--num);
}
slabp->s_magic = SLAB_MAGIC_DESTROYED;
if (slabp->s_index)
kmem_cache_free(cachep->c_index_cachep, slabp->s_index);
kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
if (SLAB_OFF_SLAB(cachep->c_flags))
kmem_cache_free(cache_slabp, slabp);
}
/* Cal the num objs, wastage, and bytes left over for a given slab size. */
static inline size_t
kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra,
unsigned long flags, size_t *left_over, unsigned long *num)
{
size_t wastage = PAGE_SIZE<<gfporder;
if (SLAB_OFF_SLAB(flags))
gfporder = 0;
else
gfporder = slab_align_size;
wastage -= gfporder;
*num = wastage / size;
wastage -= (*num * size);
*left_over = wastage;
return (wastage + gfporder + (extra * *num));
}
/* Create a cache:
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a int, but can be interrupted.
* NOTE: The 'name' is assumed to be memory that is _not_ going to disappear.
*/
kmem_cache_t *
kmem_cache_create(const char *name, size_t size, size_t offset,
unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
void (*dtor)(void*, kmem_cache_t *, unsigned long))
{
const char *func_nm= KERN_ERR "kmem_create: ";
kmem_cache_t *searchp;
kmem_cache_t *cachep=NULL;
size_t extra;
size_t left_over;
size_t align;
/* Sanity checks... */
#if SLAB_MGMT_CHECKS
if (!name) {
printk("%sNULL ptr\n", func_nm);
goto opps;
}
if (in_interrupt()) {
printk("%sCalled during int - %s\n", func_nm, name);
goto opps;
}
if (size < BYTES_PER_WORD) {
printk("%sSize too small %d - %s\n", func_nm, (int) size, name);
size = BYTES_PER_WORD;
}
if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
printk("%sSize too large %d - %s\n", func_nm, (int) size, name);
goto opps;
}
if (dtor && !ctor) {
/* Decon, but no con - doesn't make sense */
printk("%sDecon but no con - %s\n", func_nm, name);
goto opps;
}
if (offset < 0 || offset > size) {
printk("%sOffset weird %d - %s\n", func_nm, (int) offset, name);
offset = 0;
}
#if SLAB_DEBUG_SUPPORT
if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
/* No constructor, but inital state check requested */
printk("%sNo con, but init state check requested - %s\n", func_nm, name);
flags &= ~SLAB_DEBUG_INITIAL;
}
if ((flags & SLAB_POISON) && ctor) {
/* request for poisoning, but we can't do that with a constructor */
printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
flags &= ~SLAB_POISON;
}
#if 0
if ((flags & SLAB_HIGH_PACK) && ctor) {
printk("%sHigh pack requested, but con given - %s\n", func_nm, name);
flags &= ~SLAB_HIGH_PACK;
}
if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
printk("%sHigh pack requested, but with poisoning/red-zoning - %s\n",
func_nm, name);
flags &= ~SLAB_HIGH_PACK;
}
#endif
#endif /* SLAB_DEBUG_SUPPORT */
#endif /* SLAB_MGMT_CHECKS */
/* Always checks flags, a caller might be expecting debug
* support which isn't available.
*/
if (flags & ~SLAB_C_MASK) {
printk("%sIllgl flg %lX - %s\n", func_nm, flags, name);
flags &= SLAB_C_MASK;
}
/* Get cache's description obj. */
cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
if (!cachep)
goto opps;
memset(cachep, 0, sizeof(kmem_cache_t));
/* Check that size is in terms of words. This is needed to avoid
* unaligned accesses for some archs when redzoning is used, and makes
* sure any on-slab bufctl's are also correctly aligned.
*/
if (size & (BYTES_PER_WORD-1)) {
size += (BYTES_PER_WORD-1);
size &= ~(BYTES_PER_WORD-1);
printk("%sForcing size word alignment - %s\n", func_nm, name);
}
cachep->c_org_size = size;
#if SLAB_DEBUG_SUPPORT
if (flags & SLAB_RED_ZONE) {
/* There is no point trying to honour cache alignment when redzoning. */
flags &= ~SLAB_HWCACHE_ALIGN;
size += 2*BYTES_PER_WORD; /* words for redzone */
}
#endif /* SLAB_DEBUG_SUPPORT */
align = BYTES_PER_WORD;
if (flags & SLAB_HWCACHE_ALIGN)
align = L1_CACHE_BYTES;
/* Determine if the slab management and/or bufclts are 'on' or 'off' slab. */
extra = sizeof(kmem_bufctl_t);
if (size < (PAGE_SIZE>>3)) {
/* Size is small(ish). Use packing where bufctl size per
* obj is low, and slab management is on-slab.
*/
#if 0
if ((flags & SLAB_HIGH_PACK)) {
/* Special high packing for small objects
* (mainly for vm_mapping structs, but
* others can use it).
*/
if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) ||
size == L1_CACHE_BYTES) {
/* The bufctl is stored with the object. */
extra = 0;
} else
flags &= ~SLAB_HIGH_PACK;
}
#endif
} else {
/* Size is large, assume best to place the slab management obj
* off-slab (should allow better packing of objs).
*/
flags |= SLAB_CFLGS_OFF_SLAB;
if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2)
|| size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
/* To avoid waste the bufctls are off-slab... */
flags |= SLAB_CFLGS_BUFCTL;
extra = 0;
} /* else slab management is off-slab, but freelist pointers are on. */
}
size += extra;
if (flags & SLAB_HWCACHE_ALIGN) {
/* Need to adjust size so that objs are cache aligned. */
if (size > (L1_CACHE_BYTES/2)) {
size_t words = size % L1_CACHE_BYTES;
if (words)
size += (L1_CACHE_BYTES-words);
} else {
/* Small obj size, can get at least two per cache line. */
int num_per_line = L1_CACHE_BYTES/size;
left_over = L1_CACHE_BYTES - (num_per_line*size);
if (left_over) {
/* Need to adjust size so objs cache align. */
if (left_over%num_per_line) {
/* Odd num of objs per line - fixup. */
num_per_line--;
left_over += size;
}
size += (left_over/num_per_line);
}
}
} else if (!(size%L1_CACHE_BYTES)) {
/* Size happens to cache align... */
flags |= SLAB_HWCACHE_ALIGN;
align = L1_CACHE_BYTES;
}
/* Cal size (in pages) of slabs, and the num of objs per slab.
* This could be made much more intelligent. For now, try to avoid
* using high page-orders for slabs. When the gfp() funcs are more
* friendly towards high-order requests, this should be changed.
*/
do {
size_t wastage;
unsigned int break_flag = 0;
cal_wastage:
wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra,
flags, &left_over, &cachep->c_num);
if (!cachep->c_num)
goto next;
if (break_flag)
break;
if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) {
/* Oops, this num of objs will cause problems. */
cachep->c_gfporder--;
break_flag++;
goto cal_wastage;
}
if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
break;
/* Large num of objs is good, but v. large slabs are currently
* bad for the gfp()s.
*/
if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
if (cachep->c_gfporder < slab_break_gfp_order)
goto next;
}
/* Stop caches with small objs having a large num of pages. */
if (left_over <= slab_align_size)
break;
if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
break; /* Acceptable internal fragmentation. */
next:
cachep->c_gfporder++;
} while (1);
/* If the slab has been placed off-slab, and we have enough space then
* move it on-slab. This is at the expense of any extra colouring.
*/
if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) &&
left_over >= slab_align_size) {
flags &= ~SLAB_CFLGS_OFF_SLAB;
left_over -= slab_align_size;
}
/* Offset must be a factor of the alignment. */
offset += (align-1);
offset &= ~(align-1);
/* Mess around with the offset alignment. */
if (!left_over) {
offset = 0;
} else if (left_over < offset) {
offset = align;
if (flags & SLAB_HWCACHE_ALIGN) {
if (left_over < offset)
offset = 0;
} else {
/* Offset is BYTES_PER_WORD, and left_over is at
* least BYTES_PER_WORD.
*/
if (left_over >= (BYTES_PER_WORD*2)) {
offset >>= 1;
if (left_over >= (BYTES_PER_WORD*4))
offset >>= 1;
}
}
} else if (!offset) {
/* No offset requested, but space enough - give one. */
offset = left_over/align;
if (flags & SLAB_HWCACHE_ALIGN) {
if (offset >= 8) {
/* A large number of colours - use a larger alignment. */
align <<= 1;
}
} else {
if (offset >= 10) {
align <<= 1;
if (offset >= 16)
align <<= 1;
}
}
offset = align;
}
#if 0
printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size);
#endif
if ((cachep->c_align = (unsigned long) offset))
cachep->c_colour = (left_over/offset);
cachep->c_colour_next = cachep->c_colour;
/* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
if (!SLAB_BUFCTL(flags))
size -= sizeof(kmem_bufctl_t);
else
cachep->c_index_cachep =
kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t));
cachep->c_offset = (unsigned long) size;
cachep->c_freep = kmem_slab_end(cachep);
cachep->c_firstp = kmem_slab_end(cachep);
cachep->c_lastp = kmem_slab_end(cachep);
cachep->c_flags = flags;
cachep->c_ctor = ctor;
cachep->c_dtor = dtor;
cachep->c_magic = SLAB_C_MAGIC;
cachep->c_name = name; /* Simply point to the name. */
spin_lock_init(&cachep->c_spinlock);
/* Need the semaphore to access the chain. */
down(&cache_chain_sem);
searchp = &cache_cache;
do {
/* The name field is constant - no lock needed. */
if (!strcmp(searchp->c_name, name)) {
printk("%sDup name - %s\n", func_nm, name);
break;
}
searchp = searchp->c_nextp;
} while (searchp != &cache_cache);
/* There is no reason to lock our new cache before we
* link it in - no one knows about it yet...
*/
cachep->c_nextp = cache_cache.c_nextp;
cache_cache.c_nextp = cachep;
up(&cache_chain_sem);
opps:
return cachep;
}
/* Shrink a cache. Releases as many slabs as possible for a cache.
* It is expected this function will be called by a module when it is
* unloaded. The cache is _not_ removed, this creates too many problems and
* the cache-structure does not take up much room. A module should keep its
* cache pointer(s) in unloaded memory, so when reloaded it knows the cache
* is available. To help debugging, a zero exit status indicates all slabs
* were released.
*/
int
kmem_cache_shrink(kmem_cache_t *cachep)
{
kmem_cache_t *searchp;
kmem_slab_t *slabp;
int ret;
if (!cachep) {
printk(KERN_ERR "kmem_shrink: NULL ptr\n");
return 2;
}
if (in_interrupt()) {
printk(KERN_ERR "kmem_shrink: Called during int - %s\n", cachep->c_name);
return 2;
}
/* Find the cache in the chain of caches. */
down(&cache_chain_sem); /* Semaphore is needed. */
searchp = &cache_cache;
for (;searchp->c_nextp != &cache_cache; searchp = searchp->c_nextp) {
if (searchp->c_nextp != cachep)
continue;
/* Accessing clock_searchp is safe - we hold the mutex. */
if (cachep == clock_searchp)
clock_searchp = cachep->c_nextp;
goto found;
}
up(&cache_chain_sem);
printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", cachep);
return 2;
found:
/* Release the semaphore before getting the cache-lock. This could
* mean multiple engines are shrinking the cache, but so what.
*/
up(&cache_chain_sem);
spin_lock_irq(&cachep->c_spinlock);
/* If the cache is growing, stop shrinking. */
while (!cachep->c_growing) {
slabp = cachep->c_lastp;
if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
break;
kmem_slab_unlink(slabp);
spin_unlock_irq(&cachep->c_spinlock);
kmem_slab_destroy(cachep, slabp);
spin_lock_irq(&cachep->c_spinlock);
}
ret = 1;
if (cachep->c_lastp == kmem_slab_end(cachep))
ret--; /* Cache is empty. */
spin_unlock_irq(&cachep->c_spinlock);
return ret;
}
/* Get the memory for a slab management obj. */
static inline kmem_slab_t *
kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags)
{
kmem_slab_t *slabp;
if (SLAB_OFF_SLAB(cachep->c_flags)) {
/* Slab management obj is off-slab. */
slabp = kmem_cache_alloc(cache_slabp, local_flags);
} else {
/* Slab management at end of slab memory, placed so that
* the position is 'coloured'.
*/
void *end;
end = objp + (cachep->c_num * cachep->c_offset);
if (!SLAB_BUFCTL(cachep->c_flags))
end += (cachep->c_num * sizeof(kmem_bufctl_t));
slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
}
if (slabp) {
slabp->s_inuse = 0;
slabp->s_dma = 0;
slabp->s_index = NULL;
}
return slabp;
}
static inline void
kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp,
unsigned long ctor_flags)
{
kmem_bufctl_t **bufpp = &slabp->s_freep;
unsigned long num = cachep->c_num-1;
do {
#if SLAB_DEBUG_SUPPORT
if (cachep->c_flags & SLAB_RED_ZONE) {
*((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
objp += BYTES_PER_WORD;
*((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
}
#endif /* SLAB_DEBUG_SUPPORT */
/* Constructors are not allowed to allocate memory from the same cache
* which they are a constructor for. Otherwise, deadlock.
* They must also be threaded.
*/
if (cachep->c_ctor)
cachep->c_ctor(objp, cachep, ctor_flags);
#if SLAB_DEBUG_SUPPORT
else if (cachep->c_flags & SLAB_POISON) {
/* need to poison the objs */
kmem_poison_obj(cachep, objp);
}
if (cachep->c_flags & SLAB_RED_ZONE) {
if (*((unsigned long*)(objp+cachep->c_org_size)) !=
SLAB_RED_MAGIC1) {
*((unsigned long*)(objp+cachep->c_org_size)) =
SLAB_RED_MAGIC1;
printk(KERN_ERR "kmem_init_obj: Bad rear redzone "
"after constructor - %s\n", cachep->c_name);
}
objp -= BYTES_PER_WORD;
if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
*((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
printk(KERN_ERR "kmem_init_obj: Bad front redzone "
"after constructor - %s\n", cachep->c_name);
}
}
#endif /* SLAB_DEBUG_SUPPORT */
objp += cachep->c_offset;
if (!slabp->s_index) {
*bufpp = objp;
objp += sizeof(kmem_bufctl_t);
} else
*bufpp = &slabp->s_index[num];
bufpp = &(*bufpp)->buf_nextp;
} while (num--);
*bufpp = NULL;
}
/* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
static int
kmem_cache_grow(kmem_cache_t * cachep, int flags)
{
kmem_slab_t *slabp;
struct page *page;
void *objp;
size_t offset;
unsigned int dma, local_flags;
unsigned long ctor_flags;
unsigned long save_flags;
/* Be lazy and only check for valid flags here,
* keeping it out of the critical path in kmem_cache_alloc().
*/
if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n",
flags, cachep->c_name);
flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
}
if (flags & SLAB_NO_GROW)
return 0;
/* The test for missing atomic flag is performed here, rather than
* the more obvious place, simply to reduce the critical path length
* in kmem_cache_alloc(). If a caller is slightly mis-behaving they
* will eventually be caught here (where it matters).
*/
if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n",
cachep->c_name);
flags &= ~SLAB_LEVEL_MASK;
flags |= SLAB_ATOMIC;
}
ctor_flags = SLAB_CTOR_CONSTRUCTOR;
local_flags = (flags & SLAB_LEVEL_MASK);
if (local_flags == SLAB_ATOMIC) {
/* Not allowed to sleep. Need to tell a constructor about
* this - it might need to know...
*/
ctor_flags |= SLAB_CTOR_ATOMIC;
}
/* About to mess with non-constant members - lock. */
spin_lock_irqsave(&cachep->c_spinlock, save_flags);
/* Get colour for the slab, and cal the next value. */
if (!(offset = cachep->c_colour_next--))
cachep->c_colour_next = cachep->c_colour;
offset *= cachep->c_align;
cachep->c_dflags = SLAB_CFLGS_GROWN;
cachep->c_growing++;
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
/* A series of memory allocations for a new slab.
* Neither the cache-chain semaphore, or cache-lock, are
* held, but the incrementing c_growing prevents this
* this cache from being reaped or shrunk.
* Note: The cache could be selected in for reaping in
* kmem_cache_reap(), but when the final test is made the
* growing value will be seen.
*/
/* Get mem for the objs. */
if (!(objp = kmem_getpages(cachep, flags, &dma)))
goto failed;
/* Get slab management. */
if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags)))
goto opps1;
if (dma)
slabp->s_dma = 1;
if (SLAB_BUFCTL(cachep->c_flags)) {
slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags);
if (!slabp->s_index)
goto opps2;
}
/* Nasty!!!!!! I hope this is OK. */
dma = 1 << cachep->c_gfporder;
page = &mem_map[MAP_NR(objp)];
do {
SLAB_SET_PAGE_CACHE(page, cachep);
SLAB_SET_PAGE_SLAB(page, slabp);
PageSetSlab(page);
page++;
} while (--dma);
slabp->s_offset = offset; /* It will fit... */
objp += offset; /* Address of first object. */
slabp->s_mem = objp;
/* For on-slab bufctls, c_offset is the distance between the start of
* an obj and its related bufctl. For off-slab bufctls, c_offset is
* the distance between objs in the slab.
*/
kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
spin_lock_irq(&cachep->c_spinlock);
/* Make slab active. */
slabp->s_magic = SLAB_MAGIC_ALLOC;
kmem_slab_link_end(cachep, slabp);
if (cachep->c_freep == kmem_slab_end(cachep))
cachep->c_freep = slabp;
SLAB_STATS_INC_GROWN(cachep);
cachep->c_failures = 0;
cachep->c_growing--;
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
return 1;
opps2:
if (SLAB_OFF_SLAB(cachep->c_flags))
kmem_cache_free(cache_slabp, slabp);
opps1:
kmem_freepages(cachep, objp);
failed:
spin_lock_irq(&cachep->c_spinlock);
cachep->c_growing--;
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
return 0;
}
static void
kmem_report_alloc_err(const char *str, kmem_cache_t * cachep)
{
if (cachep)
SLAB_STATS_INC_ERR(cachep); /* this is atomic */
printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
str, cachep ? cachep->c_name : "unknown");
}
static void
kmem_report_free_err(const char *str, const void *objp, kmem_cache_t * cachep)
{
if (cachep)
SLAB_STATS_INC_ERR(cachep);
printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
str, objp, cachep ? cachep->c_name : "unknown");
}
/* Search for a slab whose objs are suitable for DMA.
* Note: since testing the first free slab (in __kmem_cache_alloc()),
* ints must not have been enabled, or the cache-lock released!
*/
static inline kmem_slab_t *
kmem_cache_search_dma(kmem_cache_t * cachep)
{
kmem_slab_t *slabp = cachep->c_freep->s_nextp;
for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
if (!(slabp->s_dma))
continue;
kmem_slab_unlink(slabp);
kmem_slab_link_free(cachep, slabp);
cachep->c_freep = slabp;
break;
}
return slabp;
}
#if SLAB_DEBUG_SUPPORT
/* Perform extra freeing checks. Currently, this check is only for caches
* that use bufctl structures within the slab. Those which use bufctl's
* from the internal cache have a reasonable check when the address is
* searched for. Called with the cache-lock held.
*/
static void *
kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp,
kmem_bufctl_t *bufp, void * objp)
{
if (SLAB_BUFCTL(cachep->c_flags))
return objp;
/* Check slab's freelist to see if this obj is there. */
for (; search_bufp; search_bufp = search_bufp->buf_nextp) {
if (search_bufp != bufp)
continue;
return NULL;
}
return objp;
}
#endif /* SLAB_DEBUG_SUPPORT */
/* Called with cache lock held. */
static inline void
kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
if (slabp->s_nextp->s_inuse) {
/* Not at correct position. */
if (cachep->c_freep == slabp)
cachep->c_freep = slabp->s_nextp;
kmem_slab_unlink(slabp);
kmem_slab_link_end(cachep, slabp);
}
}
/* Called with cache lock held. */
static inline void
kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
if (slabp->s_nextp->s_inuse == cachep->c_num) {
kmem_slab_unlink(slabp);
kmem_slab_link_free(cachep, slabp);
}
cachep->c_freep = slabp;
}
/* Returns a ptr to an obj in the given cache. */
static inline void *
__kmem_cache_alloc(kmem_cache_t *cachep, int flags)
{
kmem_slab_t *slabp;
kmem_bufctl_t *bufp;
void *objp;
unsigned long save_flags;
/* Sanity check. */
if (!cachep)
goto nul_ptr;
spin_lock_irqsave(&cachep->c_spinlock, save_flags);
try_again:
/* Get slab alloc is to come from. */
slabp = cachep->c_freep;
/* Magic is a sanity check _and_ says if we need a new slab. */
if (slabp->s_magic != SLAB_MAGIC_ALLOC)
goto alloc_new_slab;
/* DMA requests are 'rare' - keep out of the critical path. */
if (flags & SLAB_DMA)
goto search_dma;
try_again_dma:
SLAB_STATS_INC_ALLOCED(cachep);
SLAB_STATS_INC_ACTIVE(cachep);
SLAB_STATS_SET_HIGH(cachep);
slabp->s_inuse++;
bufp = slabp->s_freep;
slabp->s_freep = bufp->buf_nextp;
if (slabp->s_freep) {
ret_obj:
if (!slabp->s_index) {
bufp->buf_slabp = slabp;
objp = ((void*)bufp) - cachep->c_offset;
finished:
/* The lock is not needed by the red-zone or poison ops, and the
* obj has been removed from the slab. Should be safe to drop
* the lock here.
*/
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
#if SLAB_DEBUG_SUPPORT
if (cachep->c_flags & SLAB_RED_ZONE)
goto red_zone;
ret_red:
if ((cachep->c_flags & SLAB_POISON) && kmem_check_poison_obj(cachep, objp))
kmem_report_alloc_err("Bad poison", cachep);
#endif /* SLAB_DEBUG_SUPPORT */
return objp;
}
/* Update index ptr. */
objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem;
bufp->buf_objp = objp;
goto finished;
}
cachep->c_freep = slabp->s_nextp;
goto ret_obj;
#if SLAB_DEBUG_SUPPORT
red_zone:
/* Set alloc red-zone, and check old one. */
if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
kmem_report_alloc_err("Bad front redzone", cachep);
objp += BYTES_PER_WORD;
if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
kmem_report_alloc_err("Bad rear redzone", cachep);
goto ret_red;
#endif /* SLAB_DEBUG_SUPPORT */
search_dma:
if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep))
goto try_again_dma;
alloc_new_slab:
/* Either out of slabs, or magic number corruption. */
if (slabp == kmem_slab_end(cachep)) {
/* Need a new slab. Release the lock before calling kmem_cache_grow().
* This allows objs to be released back into the cache while growing.
*/
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
if (kmem_cache_grow(cachep, flags)) {
/* Someone may have stolen our objs. Doesn't matter, we'll
* just come back here again.
*/
spin_lock_irq(&cachep->c_spinlock);
goto try_again;
}
/* Couldn't grow, but some objs may have been freed. */
spin_lock_irq(&cachep->c_spinlock);
if (cachep->c_freep != kmem_slab_end(cachep)) {
if ((flags & SLAB_ATOMIC) == 0)
goto try_again;
}
} else {
/* Very serious error - maybe panic() here? */
kmem_report_alloc_err("Bad slab magic (corrupt)", cachep);
}
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
err_exit:
return NULL;
nul_ptr:
kmem_report_alloc_err("NULL ptr", NULL);
goto err_exit;
}
/* Release an obj back to its cache. If the obj has a constructed state,
* it should be in this state _before_ it is released.
*/
static inline void
__kmem_cache_free(kmem_cache_t *cachep, const void *objp)
{
kmem_slab_t *slabp;
kmem_bufctl_t *bufp;
unsigned long save_flags;
/* Basic sanity checks. */
if (!cachep || !objp)
goto null_addr;
#if SLAB_DEBUG_SUPPORT
/* A verify func is called without the cache-lock held. */
if (cachep->c_flags & SLAB_DEBUG_INITIAL)
goto init_state_check;
finished_initial:
if (cachep->c_flags & SLAB_RED_ZONE)
goto red_zone;
return_red:
#endif /* SLAB_DEBUG_SUPPORT */
spin_lock_irqsave(&cachep->c_spinlock, save_flags);
if (SLAB_BUFCTL(cachep->c_flags))
goto bufctl;
bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
/* Get slab for the object. */
#if 0
/* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
* Is this worth while? XXX
*/
if (cachep->c_flags & SLAB_HIGH_PACK)
slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
else
#endif
slabp = bufp->buf_slabp;
check_magic:
if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* Sanity check. */
goto bad_slab;
#if SLAB_DEBUG_SUPPORT
if (cachep->c_flags & SLAB_DEBUG_FREE)
goto extra_checks;
passed_extra:
#endif /* SLAB_DEBUG_SUPPORT */
if (slabp->s_inuse) { /* Sanity check. */
SLAB_STATS_DEC_ACTIVE(cachep);
slabp->s_inuse--;
bufp->buf_nextp = slabp->s_freep;
slabp->s_freep = bufp;
if (bufp->buf_nextp) {
if (slabp->s_inuse) {
/* (hopefully) The most common case. */
finished:
#if SLAB_DEBUG_SUPPORT
if (cachep->c_flags & SLAB_POISON) {
if (cachep->c_flags & SLAB_RED_ZONE)
objp += BYTES_PER_WORD;
kmem_poison_obj(cachep, objp);
}
#endif /* SLAB_DEBUG_SUPPORT */
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
return;
}
kmem_cache_full_free(cachep, slabp);
goto finished;
}
kmem_cache_one_free(cachep, slabp);
goto finished;
}
/* Don't add to freelist. */
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
kmem_report_free_err("free with no active objs", objp, cachep);
return;
bufctl:
/* No 'extra' checks are performed for objs stored this way, finding
* the obj is check enough.
*/
slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
bufp = &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset];
if (bufp->buf_objp == objp)
goto check_magic;
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
kmem_report_free_err("Either bad obj addr or double free", objp, cachep);
return;
#if SLAB_DEBUG_SUPPORT
init_state_check:
/* Need to call the slab's constructor so the
* caller can perform a verify of its state (debugging).
*/
cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
goto finished_initial;
extra_checks:
if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) {
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
kmem_report_free_err("Double free detected during checks", objp, cachep);
return;
}
goto passed_extra;
red_zone:
/* We do not hold the cache-lock while checking the red-zone.
*/
objp -= BYTES_PER_WORD;
if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
/* Either write before start of obj, or a double free. */
kmem_report_free_err("Bad front redzone", objp, cachep);
}
if (xchg((unsigned long *)(objp+cachep->c_org_size+BYTES_PER_WORD), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
/* Either write past end of obj, or a double free. */
kmem_report_free_err("Bad rear redzone", objp, cachep);
}
goto return_red;
#endif /* SLAB_DEBUG_SUPPORT */
bad_slab:
/* Slab doesn't contain the correct magic num. */
if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
/* Magic num says this is a destroyed slab. */
kmem_report_free_err("free from inactive slab", objp, cachep);
} else
kmem_report_free_err("Bad obj addr", objp, cachep);
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
#if 1
/* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
*(int *) 0 = 0;
#endif
return;
null_addr:
kmem_report_free_err("NULL ptr", objp, cachep);
return;
}
void *
kmem_cache_alloc(kmem_cache_t *cachep, int flags)
{
return __kmem_cache_alloc(cachep, flags);
}
void
kmem_cache_free(kmem_cache_t *cachep, void *objp)
{
__kmem_cache_free(cachep, objp);
}
void *
kmalloc(size_t size, int flags)
{
cache_sizes_t *csizep = cache_sizes;
for (; csizep->cs_size; csizep++) {
if (size > csizep->cs_size)
continue;
return __kmem_cache_alloc(csizep->cs_cachep, flags);
}
printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
return NULL;
}
void
kfree(const void *objp)
{
struct page *page;
int nr;
if (!objp)
goto null_ptr;
nr = MAP_NR(objp);
if (nr >= max_mapnr)
goto bad_ptr;
/* Assume we own the page structure - hence no locking.
* If someone is misbehaving (for example, calling us with a bad
* address), then access to the page structure can race with the
* kmem_slab_destroy() code. Need to add a spin_lock to each page
* structure, which would be useful in threading the gfp() functions....
*/
page = &mem_map[nr];
if (PageSlab(page)) {
kmem_cache_t *cachep;
/* Here, we again assume the obj address is good.
* If it isn't, and happens to map onto another
* general cache page which has no active objs, then
* we race.
*/
cachep = SLAB_GET_PAGE_CACHE(page);
if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
__kmem_cache_free(cachep, objp);
return;
}
}
bad_ptr:
printk(KERN_ERR "kfree: Bad obj %p\n", objp);
#if 1
/* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
*(int *) 0 = 0;
#endif
null_ptr:
return;
}
void
kfree_s(const void *objp, size_t size)
{
struct page *page;
int nr;
if (!objp)
goto null_ptr;
nr = MAP_NR(objp);
if (nr >= max_mapnr)
goto null_ptr;
/* See comment in kfree() */
page = &mem_map[nr];
if (PageSlab(page)) {
kmem_cache_t *cachep;
/* See comment in kfree() */
cachep = SLAB_GET_PAGE_CACHE(page);
if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
if (size <= cachep->c_org_size) { /* XXX better check */
__kmem_cache_free(cachep, objp);
return;
}
}
}
null_ptr:
printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
return;
}
kmem_cache_t *
kmem_find_general_cachep(size_t size)
{
cache_sizes_t *csizep = cache_sizes;
/* This function could be moved to the header file, and
* made inline so consumers can quickly determine what
* cache pointer they require.
*/
for (; csizep->cs_size; csizep++) {
if (size > csizep->cs_size)
continue;
break;
}
return csizep->cs_cachep;
}
/* Called from try_to_free_page().
* This function _cannot_ be called within a int, but it
* can be interrupted.
*/
void
kmem_cache_reap(int gfp_mask)
{
kmem_slab_t *slabp;
kmem_cache_t *searchp;
kmem_cache_t *best_cachep;
unsigned int scan;
unsigned int reap_level;
if (in_interrupt()) {
printk("kmem_cache_reap() called within int!\n");
return;
}
/* We really need a test semaphore op so we can avoid sleeping when
* !wait is true.
*/
down(&cache_chain_sem);
scan = 10;
reap_level = 0;
best_cachep = NULL;
searchp = clock_searchp;
do {
unsigned int full_free;
unsigned int dma_flag;
/* It's safe to test this without holding the cache-lock. */
if (searchp->c_flags & SLAB_NO_REAP)
goto next;
spin_lock_irq(&searchp->c_spinlock);
if (searchp->c_growing)
goto next_unlock;
if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
goto next_unlock;
}
/* Sanity check for corruption of static values. */
if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) {
spin_unlock_irq(&searchp->c_spinlock);
printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name);
goto next;
}
dma_flag = 0;
full_free = 0;
/* Count the fully free slabs. There should not be not many,
* since we are holding the cache lock.
*/
slabp = searchp->c_lastp;
while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) {
slabp = slabp->s_prevp;
full_free++;
if (slabp->s_dma)
dma_flag++;
}
spin_unlock_irq(&searchp->c_spinlock);
if ((gfp_mask & GFP_DMA) && !dma_flag)
goto next;
if (full_free) {
if (full_free >= 10) {
best_cachep = searchp;
break;
}
/* Try to avoid slabs with constructors and/or
* more than one page per slab (as it can be difficult
* to get high orders from gfp()).
*/
if (full_free >= reap_level) {
reap_level = full_free;
best_cachep = searchp;
}
}
goto next;
next_unlock:
spin_unlock_irq(&searchp->c_spinlock);
next:
searchp = searchp->c_nextp;
} while (--scan && searchp != clock_searchp);
clock_searchp = searchp;
up(&cache_chain_sem);
if (!best_cachep) {
/* couldn't find anything to reap */
return;
}
spin_lock_irq(&best_cachep->c_spinlock);
while (!best_cachep->c_growing &&
!(slabp = best_cachep->c_lastp)->s_inuse &&
slabp != kmem_slab_end(best_cachep)) {
if (gfp_mask & GFP_DMA) {
do {
if (slabp->s_dma)
goto good_dma;
slabp = slabp->s_prevp;
} while (!slabp->s_inuse && slabp != kmem_slab_end(best_cachep));
/* Didn't found a DMA slab (there was a free one -
* must have been become active).
*/
goto dma_fail;
good_dma:
}
if (slabp == best_cachep->c_freep)
best_cachep->c_freep = slabp->s_nextp;
kmem_slab_unlink(slabp);
SLAB_STATS_INC_REAPED(best_cachep);
/* Safe to drop the lock. The slab is no longer linked to the
* cache.
*/
spin_unlock_irq(&best_cachep->c_spinlock);
kmem_slab_destroy(best_cachep, slabp);
spin_lock_irq(&best_cachep->c_spinlock);
}
dma_fail:
spin_unlock_irq(&best_cachep->c_spinlock);
return;
}
#if SLAB_SELFTEST
/* A few v. simple tests */
static void
kmem_self_test(void)
{
kmem_cache_t *test_cachep;
printk(KERN_INFO "kmem_test() - start\n");
test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISON, NULL, NULL);
if (test_cachep) {
char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
if (objp) {
/* Write in front and past end, red-zone test. */
*(objp-1) = 1;
*(objp+16) = 1;
kmem_cache_free(test_cachep, objp);
/* Mess up poisoning. */
*objp = 10;
objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
kmem_cache_free(test_cachep, objp);
/* Mess up poisoning (again). */
*objp = 10;
kmem_cache_shrink(test_cachep);
}
}
printk(KERN_INFO "kmem_test() - finished\n");
}
#endif /* SLAB_SELFTEST */
#if defined(CONFIG_PROC_FS)
/* /proc/slabinfo
* cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
*/
int
get_slabinfo(char *buf)
{
kmem_cache_t *cachep;
kmem_slab_t *slabp;
unsigned long active_objs;
unsigned long save_flags;
unsigned long num_slabs;
unsigned long num_objs;
int len=0;
#if SLAB_STATS
unsigned long active_slabs;
#endif /* SLAB_STATS */
__save_flags(save_flags);
/* Output format version, so at least we can change it without _too_
* many complaints.
*/
#if SLAB_STATS
len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n");
#else
len = sprintf(buf, "slabinfo - version: 1.0\n");
#endif /* SLAB_STATS */
down(&cache_chain_sem);
cachep = &cache_cache;
do {
#if SLAB_STATS
active_slabs = 0;
#endif /* SLAB_STATS */
num_slabs = active_objs = 0;
spin_lock_irq(&cachep->c_spinlock);
for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
active_objs += slabp->s_inuse;
num_slabs++;
#if SLAB_STATS
if (slabp->s_inuse)
active_slabs++;
#endif /* SLAB_STATS */
}
num_objs = cachep->c_num*num_slabs;
#if SLAB_STATS
{
unsigned long errors;
unsigned long high = cachep->c_high_mark;
unsigned long grown = cachep->c_grown;
unsigned long reaped = cachep->c_reaped;
unsigned long allocs = cachep->c_num_allocations;
errors = (unsigned long) atomic_read(&cachep->c_errors);
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
len += sprintf(buf+len, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
cachep->c_name, active_objs, num_objs, active_slabs, num_slabs,
(1<<cachep->c_gfporder)*num_slabs,
high, allocs, grown, reaped, errors);
}
#else
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
len += sprintf(buf+len, "%-17s %6lu %6lu\n", cachep->c_name, active_objs, num_objs);
#endif /* SLAB_STATS */
} while ((cachep = cachep->c_nextp) != &cache_cache);
up(&cache_chain_sem);
return len;
}
#endif /* CONFIG_PROC_FS */